home *** CD-ROM | disk | FTP | other *** search
/ Developer CD Series 2000 November: Tool Chest / Dev.CD Nov 00 TC Disk 1.toast / Sample Code / Contributed / SpriteWorld / SpriteWorld Files / BlitPixie / Sources / BlitPixieBlitErase.c < prev    next >
Encoding:
Text File  |  2000-10-06  |  7.1 KB  |  396 lines  |  [TEXT/CWIE]

  1. ///--------------------------------------------------------------------------------------
  2. //    BlitPixieBlitErase - cache-optimized screen blitter
  3. //
  4. //    written by Anders F Björklund <afb@algonet.se>
  5. //    ©2000 afb.
  6. ///--------------------------------------------------------------------------------------
  7.  
  8. #ifndef __BLITPIXIE__
  9. #include "BlitPixieHeader.h"
  10. #endif
  11.  
  12. #include "BlitPixieAsm.h"
  13.  
  14. #pragma mark *** PowerPC asm : 
  15. #if USE_PPC_ASSEMBLY
  16.  
  17. //    NOTE:    assumes dstRowBytes is multiple of 4 (for alignment purposes)
  18. //    NOTE:    assumes bytes, rows > 0
  19.  
  20. ASM_FUNC void BlitPixieBlitErase(
  21.     register unsigned char *src,            // r3
  22.     register unsigned char *dst,            // r4
  23.     register unsigned char *bkg,            // r5
  24.     register unsigned long srcRowBytes,        // r6
  25.     register unsigned long dstRowBytes,        // r7
  26.     register unsigned short bytes,            // r8
  27.     register unsigned short rows )            // r9
  28. {
  29. #define    r_src                r3
  30. #define    r_dst                r4
  31. #define    r_bkg                r5
  32. #define    r_srcRowBytes        r6
  33. #define    r_dstRowBytes        r7
  34. #define    r_width                r8
  35. #define    r_height            r9
  36.  
  37. #define    r_srcStride            r31
  38. #define    r_dstStride            r30
  39. #define    r_bytes                r29
  40. #define    r_blocks            r28
  41. #define    r_y                    r27
  42.  
  43. #define    kRegisterSaveStack    (5 * 4)
  44.  
  45.     ASM_BEGIN
  46.     stmw     r27,-kRegisterSaveStack(SP)
  47.  
  48.     mr        r_bytes,r_width
  49.     mr        r_y,r_height
  50.  
  51.     sub        r_srcStride,r_srcRowBytes,r_width
  52.     sub        r_dstStride,r_dstRowBytes,r_width
  53.  
  54.     neg        r0,r4
  55.     rlwinm    r0,r0,0,27,31
  56.     sub        r_bytes,r_bytes,r0
  57.  
  58.     subi    r_src,r_src,8
  59.     subi    r_dst,r_dst,8
  60.     subi    r_bkg,r_bkg,8
  61.     
  62.     rlwinm.  r_blocks,r_bytes,27,5,31
  63.     rlwinm   r_bytes,r_bytes,0,27,31
  64.  
  65.     #define    FLAG_BLOCKS            20
  66. //    #define    FLAG_FREE            21    // note: free for use
  67.  
  68.     #define    FLAG_PRE1            26    
  69.     #define    FLAG_PRE2            25    
  70.     #define    FLAG_PRE4            24    
  71.     #define    FLAG_PRE8            23    
  72.     #define    FLAG_PRE16            22        
  73.  
  74.     #define    FLAG_POST1            31
  75.     #define    FLAG_POST2            30
  76.     #define    FLAG_POST4            29
  77.     #define    FLAG_POST8            28    
  78.     #define    FLAG_POST16            27
  79.  
  80.         rlwinm    r0,r0,27-FLAG_PRE16,FLAG_PRE16,FLAG_PRE1
  81.         rlwimi    r0,r_bytes,27-FLAG_POST16,FLAG_POST16,FLAG_POST1
  82.         mtcrf    0x07,r0            // cr5 | cr6 | cr7
  83.         crnor    FLAG_BLOCKS,0*CR_NO + CR_EQ,0*CR_NO + CR_EQ
  84.         
  85.     @rowloop:
  86.         mtctr    r_blocks
  87.             
  88.             // copy pre-block
  89.         bc        IF_NOT,FLAG_PRE1,@skip_pre1
  90.  
  91.         lbz        r0,8(r3)
  92.         lbz        r10,8(r5)
  93.         addi    r3,r3,1
  94.         stb        r0,8(r4)
  95.         addi    r5,r5,1
  96.         stb        r10,7(r3)
  97.         addi    r4,r4,1 
  98.         
  99.     @skip_pre1:
  100.         bc        IF_NOT,FLAG_PRE2,@skip_pre2
  101.  
  102.         lhz        r0,8(r3)
  103.         lhz        r10,8(r5)
  104.         addi    r3,r3,2
  105.         sth        r0,8(r4)
  106.         addi    r5,r5,2
  107.         sth        r10,6(r3)
  108.         addi    r4,r4,2
  109.         
  110.     @skip_pre2:
  111.         bc        IF_NOT,FLAG_PRE4,@skip_pre4
  112.  
  113.         lwz        r0,8(r3)
  114.         lwz        r10,8(r5)
  115.         addi    r3,r3,4
  116.         stw        r0,8(r4)
  117.         addi    r5,r5,4
  118.         stw        r10,4(r3)
  119.         addi    r4,r4,4
  120.         
  121.     @skip_pre4:
  122.         bc        IF_NOT,FLAG_PRE8,@skip_pre8
  123.  
  124.         lfd        fp1,8(r3)
  125.         lfd        fp2,8(r5)
  126.         addi    r3,r3,8
  127.         stfd    fp1,8(r4)
  128.         addi    r5,r5,8
  129.         stfd    fp2,0(r3)
  130.         addi    r4,r4,8
  131.         
  132.     @skip_pre8:
  133.         bc        IF_NOT,FLAG_PRE16,@skip_pre16
  134.         
  135.         lfd        fp1,8(r3)
  136.         lfd        fp2,16(r3)
  137.         lfd        fp3,8(r5)
  138.         lfd        fp4,16(r5)
  139.         addi    r3,r3,16
  140.         stfd    fp1,8(r4)
  141.         stfd    fp2,16(r4)
  142.         addi    r5,r5,16
  143.         stfd    fp3,-8(r3)
  144.         stfd    fp4,0(r3)
  145.         addi    r4,r4,16
  146.         
  147.     @skip_pre16:
  148.  
  149.             // copy blocks
  150.         bc        IF_NOT,FLAG_BLOCKS,@skipblockloop
  151.         li        r0,8
  152.         
  153.     @blockloop:
  154.         lfd        fp1,8(r5)
  155.         lfd        fp2,16(r5)
  156.         lfd        fp3,24(r5)
  157.         lfdu    fp4,32(r5)
  158.  
  159.         lfd        fp5,8(r3)
  160.         lfd        fp6,16(r3)
  161.         lfd        fp7,24(r3)
  162.         lfd        fp8,32(r3)
  163.  
  164.         
  165.         stfd    fp1,8(r3)
  166.         stfd    fp2,16(r3)
  167.         stfd    fp3,24(r3)
  168.         stfdu    fp4,32(r3)
  169.         
  170.         stfd    fp5,8(r4)
  171.         stfd    fp6,16(r4)
  172.         stfd    fp7,24(r4)
  173.         stfdu    fp8,32(r4)
  174.     
  175.         bdnz    @blockloop
  176.     @skipblockloop:
  177.  
  178.         subic.   r_y,r_y,1
  179.         
  180.             // copy post-block
  181.         bc        IF_NOT,FLAG_POST16,@skip_post16
  182.         
  183.         lfd        fp1,8(r3)
  184.         lfd        fp2,16(r3)
  185.         lfd        fp3,8(r5)
  186.         lfd        fp4,16(r5)
  187.         addi    r3,r3,16
  188.         stfd    fp1,8(r4)
  189.         stfd    fp2,16(r4)
  190.         addi    r5,r5,16
  191.         stfd    fp3,-8(r3)
  192.         stfd    fp4,0(r3)
  193.         addi    r4,r4,16
  194.         
  195.     @skip_post16:
  196.         bc        IF_NOT,FLAG_POST8,@skip_post8
  197.  
  198.         lfd        fp1,8(r3)
  199.         lfd        fp2,8(r5)
  200.         addi    r3,r3,8
  201.         stfd    fp1,8(r4)
  202.         addi    r5,r5,8
  203.         stfd    fp2,0(r3)
  204.         addi    r4,r4,8
  205.         
  206.     @skip_post8:
  207.         bc        IF_NOT,FLAG_POST4,@skip_post4
  208.  
  209.         lwz        r0,8(r3)
  210.         lwz        r10,8(r5)
  211.         addi    r3,r3,4
  212.         stw        r0,8(r4)
  213.         addi    r5,r5,4
  214.         stw        r10,4(r3)
  215.         addi    r4,r4,4
  216.         
  217.     @skip_post4:
  218.         bc        IF_NOT,FLAG_POST2,@skip_post2
  219.  
  220.         lhz        r0,8(r3)
  221.         lhz        r10,8(r5)
  222.         addi    r3,r3,2
  223.         sth        r0,8(r4)
  224.         addi    r5,r5,2
  225.         sth        r10,6(r3)
  226.         addi    r4,r4,2
  227.         
  228.     @skip_post2:
  229.         bc        IF_NOT,FLAG_POST1,@skip_post1
  230.  
  231.         lbz        r0,8(r3)
  232.         lbz        r10,8(r5)
  233.         addi    r3,r3,1
  234.         stb        r0,8(r4)
  235.         addi    r5,r5,1
  236.         stb        r10,7(r3)
  237.         addi    r4,r4,1
  238.         
  239.     @skip_post1:
  240.  
  241.         add        r3,r3,r_srcStride
  242.         add        r4,r4,r_dstStride
  243.         add        r5,r5,r_srcStride
  244.         
  245.     bne            @rowloop
  246.  
  247.     lmw     r27,-kRegisterSaveStack(SP)
  248.     ASM_END
  249. }
  250.  
  251. #pragma mark *** 680x0 asm : 
  252. #elif USE_68K_ASSEMBLY
  253.  
  254. ASM_FUNC void BlitPixieBlitErase(
  255.     unsigned char *src,
  256.     unsigned char *dst,
  257.     unsigned char *bkg,
  258.     unsigned long srcRowBytes,
  259.     unsigned long dstRowBytes,
  260.     unsigned short bytes,
  261.     unsigned short rows)
  262. {
  263.     #define D_color            D2
  264.     #define D_bytes            D3
  265.     #define D_rows            D4
  266.     #define D_srcBytes        D5
  267.     #define D_dstBytes        D6
  268.     
  269.     ASM_BEGIN
  270.  
  271.     MOVEM.L      D3-D6/A2-A3,-(SP)
  272.  
  273.     MOVE.L      src,A0
  274.     MOVE.L      dst,A1
  275.     MOVE.L      bkg,A2
  276.     MOVE.L      srcRowBytes,D_srcBytes
  277.     MOVE.L      dstRowBytes,D_dstBytes
  278.     MOVE.W      bytes,D_bytes
  279.     MOVE.W      rows,D_rows
  280.     
  281.     EXT.L      D_bytes
  282.     SUB.L      D_bytes,D_srcBytes
  283.     SUB.L      D_bytes,D_dstBytes
  284.      
  285.  //    *** LOOP SETUP ***
  286.     MOVEQ     #15,D0
  287.     CLR.L      D1
  288.  
  289.     MOVE.W    D_bytes,D1
  290.     LSR.W     #2,D1            //    / sizeof(long)
  291.     AND.W      D0,D1
  292.     LSR.W     #2,D1            //    * sizeof(MOVE.L (A0),(A1)+; MOVE.L (A2)+,(A0)+)
  293.     LEA          @loopend,A3
  294.     SUBA.L      D1,A3    
  295.  
  296.     MOVE.W    D_bytes,D1
  297.     LSR.W       #6,D1
  298.  
  299. // *** COPY ***
  300.  
  301. @rowloop:
  302.  
  303.              // align to word boundary
  304.             // main word copy loop
  305.         MOVE.W    D1,D0
  306.            JMP        (A3)
  307.    @loopstart:
  308.         MOVE.L    (A0),(A1)+
  309.         MOVE.L    (A2)+,(A0)+
  310.         MOVE.L    (A0),(A1)+
  311.         MOVE.L    (A2)+,(A0)+
  312.         MOVE.L    (A0),(A1)+
  313.         MOVE.L    (A2)+,(A0)+
  314.         MOVE.L    (A0),(A1)+
  315.         MOVE.L    (A2)+,(A0)+
  316.         MOVE.L    (A0),(A1)+
  317.         MOVE.L    (A2)+,(A0)+
  318.         MOVE.L    (A0),(A1)+
  319.         MOVE.L    (A2)+,(A0)+
  320.         MOVE.L    (A0),(A1)+
  321.         MOVE.L    (A2)+,(A0)+
  322.         MOVE.L    (A0),(A1)+
  323.         MOVE.L    (A2)+,(A0)+
  324.         MOVE.L    (A0),(A1)+
  325.         MOVE.L    (A2)+,(A0)+
  326.         MOVE.L    (A0),(A1)+
  327.         MOVE.L    (A2)+,(A0)+
  328.         MOVE.L    (A0),(A1)+
  329.         MOVE.L    (A2)+,(A0)+
  330.         MOVE.L    (A0),(A1)+
  331.         MOVE.L    (A2)+,(A0)+
  332.         MOVE.L    (A0),(A1)+
  333.         MOVE.L    (A2)+,(A0)+
  334.         MOVE.L    (A0),(A1)+
  335.         MOVE.L    (A2)+,(A0)+
  336.         MOVE.L    (A0),(A1)+
  337.         MOVE.L    (A2)+,(A0)+
  338.         MOVE.L    (A0),(A1)+
  339.         MOVE.L    (A2)+,(A0)+
  340.     @loopend:
  341.            DBRA    D0,@loopstart
  342.  
  343.               // do left-overs
  344.          MOVE.W    D_bytes,D0
  345.           ANDI.W    #2,D0
  346.           BEQ.S    @restword
  347.           MOVE.W    (A0),(A1)+
  348.           MOVE.W    (A2)+,(A0)+
  349.       @restword:
  350.         MOVE.W    D_bytes,D0
  351.           ANDI.W    #1,D0
  352.           BEQ.S    @restbyte
  353.           MOVE.B    (A0),(A1)+
  354.           MOVE.B    (A2)+,(A0)+
  355.       @restbyte:
  356.      
  357.     ADDA.L     D_srcBytes,A0
  358.     ADDA.L     D_srcBytes,A2
  359.     ADDA.L     D_dstBytes,A1
  360.  
  361.     SUBQ.W     #1,D_rows
  362.     BNE.S      @rowloop
  363.  
  364.     MOVEM.L      (SP)+,D3-D6/A2-A3
  365.  
  366.     ASM_END
  367. }
  368.  
  369. #pragma mark *** Generic C : 
  370. #elif USE_GENERIC_C
  371.  
  372. void BlitPixieBlitErase(
  373.     unsigned char *src,
  374.     unsigned char *dst,
  375.     unsigned char *bkg,
  376.     unsigned long srcRowBytes,
  377.     unsigned long dstRowBytes,
  378.     unsigned short bytes,
  379.     unsigned short rows)
  380. {
  381.     BLITPIXIE_ASSERT(rows > 0 );
  382.     BLITPIXIE_ASSERT(bytes > 0 );
  383.     
  384.     while (rows--)
  385.     {
  386.         BlitPixieMemCopy( dst, src, bytes );
  387.         BlitPixieMemCopy( src, bkg, bytes );
  388.         src += srcRowBytes;
  389.         dst += dstRowBytes;
  390.         bkg += srcRowBytes;
  391.     }
  392. }
  393.  
  394. #endif // GENERATING…
  395.  
  396.